
# remove everything
rm(list=ls())

# =================================================================================
# ---------------------------------------------------------------------
# Drop outliers in dataset "Data" according to the "v"th column
# at each percentage "p" at bottom and top. The default value of
# p is 0.005. v is a vector of numbers.
# ---------------------------------------------------------------------

outlier <- function(Data, v, p=0.005) {
  out <- c()
  for(i in v) {
    out <- rbind(out, quantile(Data[,i], probs=c(p, 1-p)))
  }
  j <- 1
  for(i in v) {
    Data <- Data[Data[,i]>out[j,1] & Data[,i]<out[j,2], ]; print(dim(Data))
    j <- j+1
  }
  return(Data)
}

# =================================================================================

# Load packages
library(foreign)
library(gmm)
library(ggplot2)

# read data industry-by-industry
Data <- read.dta('Ind39.dta')

Data <- outlier(Data, which(names(Data)=="sm"), p=0.005)

# Delete obs according to variable "Foreign"
Data <- subset(Data, F1>=0 & F1<=1 & !is.na(Province) & sm<=1)
# summary(Data)
names(Data)
load("Boot_s_Dt.RData")


# ========================
# map

DLE <- BpF[,1]; SPE <- Bpsw[,1]; TILE <- Bptil[,1]
Data$DLE <- DLE; Data$SPE <- SPE; Data$TILE <- TILE
province <- Data$Province

library(maps)
library(mapdata)
library(maptools)

location <- readShapePoly('bou2_4p.shp')

dat <- read.csv('capital location.csv', header=TRUE)
dat2 = read.csv(text = "??,jd,wd
                Beijing,116.4666667,39.9
                Shanghai,121.4833333,31.23333333
                Guangzhou,113.25,23.13333333"
)


# Show the density of firms by deepth of shade

getColor=function(mapdata,provname,provcol,othercol)
{
  f=function(x,y) ifelse(x %in% y,which(y==x),0);
  colIndex=sapply(mapdata$NAME,f,provname);
  fgc=c(othercol,provcol)[colIndex+1];
  return(fgc);
}

as.character(na.omit(unique(location$NAME)));

provname=as.character(na.omit(unique(location$NAME)));

order.prov <- c(10, 5, 30, 9, 8, 27, 7, 
                1, 6, 3, 26, 28, 29, 13,
                31, 19, 11, 12, 24, 18, 4,
                2, 14, 17, 15, 25, 22, 16,
                21, 32, 20, 33, 23)
othercol <- rgb(250, 250, 250, max=255, alpha=100)
par(mar=rep(0,4))

# -------------------------
# province level SP
SP <- c(aggregate(Data$SPE, list(province), median, na.rm=TRUE)$x,0,0,0); pop2 <- SP[order.prov]
pop2[pop2<0] <- 0

provcol2=rgb(red=255*(1-pop2/max(pop2)),green=255*(1-pop2/max(pop2)),blue=255*(1-pop2/max(pop2)), max=255, alpha=180);
plot(location,col=getColor(location,provname,provcol2,othercol),ylim = c(18, 54));
points(dat2$jd, dat2$wd, pch = 19 , col = "black")
text(dat2$jd, dat2$wd, dat2[, 1], cex = 1.2, col = 'black', pos = c(3, 3, 1))
axis(1, lwd = 0); axis(2, lwd = 0); axis(3, lwd = 0); axis(4, lwd = 0)


# ========================
# histogram

dev.new(width = 600, height = 450, unit = "px", noRStudioGD = TRUE)


df0 <- data.frame(SPE)
ggplot(df0, aes(SPE)) + 
  geom_histogram(aes(y=stat(count) / sum(count)*100), alpha=0.5, bins = 30, na.rm = TRUE, binwidth=0.02,color="black", fill="grey") +
  geom_vline(xintercept = 0, linetype="dashed", size=1)+
  scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(title="(a)",x="SP", y = "Relative Frequency (%)")+
  xlim(-0.5, 1) + 
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),axis.text=element_text(size=13),
        axis.title=element_text(size=13))


dev.new(width = 600, height = 450, unit = "px", noRStudioGD = TRUE)

df <- data.frame(ME=c(DLE,TILE), Type=c(rep("DL", length(DLE)), rep("TIL", length(DLE))))
ggplot(df, aes(x=ME, color=Type, fill=Type)) +
  geom_histogram(aes(y=stat(count) / sum(count)*100), position="identity", alpha=0.5, bins = 30, na.rm = TRUE, binwidth=0.005)+
  geom_vline(xintercept = 0, linetype="dashed", size=1)+
  scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
  labs(title="(b)",x="DL & TIL", y = "Relative Frequency (%)")+
  xlim(-0.2, 0.4) + 
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5,size = 15,face = "bold"),axis.text=element_text(size=13),
        axis.title=element_text(size=13))



